Develop a Shiny app that visualizes the progression of the 2019-20 Global Coronavirus Outbreak.
If you are ambitious, bring in visualization of other types of data, e.g., stock market, tweets, contrast with previous outbreak (e.g., 2003 SARS, 2009 H1N1, 2012 MERS, 2014 Ebola), prediction from your statistical/epidemiological model, and so on.
Publish your Shiny app to https://www.shinyapps.io and share the link.
(Optinal) Submit your app to the 2020 Shiny Contest (deadline 20 March 2020 at 5pm ET).
Below is some data exploration, which may help you get started. You do not have to use it. Note below code may change frequently.
Since Feb 11, JHU SSE is hosting 2019 nCoV data on GitHub repo: https://github.com/CSSEGISandData/2019-nCoV.
Let’s import the time series data directly from the csv file on GitHub:
(confirmed <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"))
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
## # A tibble: 94 x 39
## `Province/State` `Country/Region` Lat Long `1/22/20` `1/23/20` `1/24/20`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Anhui Mainland China 31.8 117. 1 9 15
## 2 Beijing Mainland China 40.2 116. 14 22 36
## 3 Chongqing Mainland China 30.1 108. 6 9 27
## 4 Fujian Mainland China 26.1 118. 1 5 10
## 5 Gansu Mainland China 36.1 104. 0 2 2
## 6 Guangdong Mainland China 23.3 113. 26 32 53
## 7 Guangxi Mainland China 23.8 109. 2 5 23
## 8 Guizhou Mainland China 26.8 107. 1 3 3
## 9 Hainan Mainland China 19.2 110. 4 5 8
## 10 Hebei Mainland China 38.0 115. 1 1 2
## # … with 84 more rows, and 32 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>,
## # `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
## # `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
## # `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
## # `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
## # `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
## # `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
## # `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
## # `2/24/20` <dbl>, `2/25/20` <dbl>
(recovered <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"))
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
## # A tibble: 94 x 39
## `Province/State` `Country/Region` Lat Long `1/22/20` `1/23/20` `1/24/20`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Anhui Mainland China 31.8 117. 0 0 0
## 2 Beijing Mainland China 40.2 116. 0 0 1
## 3 Chongqing Mainland China 30.1 108. 0 0 0
## 4 Fujian Mainland China 26.1 118. 0 0 0
## 5 Gansu Mainland China 36.1 104. 0 0 0
## 6 Guangdong Mainland China 23.3 113. 0 2 2
## 7 Guangxi Mainland China 23.8 109. 0 0 0
## 8 Guizhou Mainland China 26.8 107. 0 0 0
## 9 Hainan Mainland China 19.2 110. 0 0 0
## 10 Hebei Mainland China 38.0 115. 0 0 0
## # … with 84 more rows, and 32 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>,
## # `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
## # `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
## # `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
## # `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
## # `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
## # `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
## # `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
## # `2/24/20` <dbl>, `2/25/20` <dbl>
(death <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"))
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
## # A tibble: 94 x 39
## `Province/State` `Country/Region` Lat Long `1/22/20` `1/23/20` `1/24/20`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Anhui Mainland China 31.8 117. 0 0 0
## 2 Beijing Mainland China 40.2 116. 0 0 0
## 3 Chongqing Mainland China 30.1 108. 0 0 0
## 4 Fujian Mainland China 26.1 118. 0 0 0
## 5 Gansu Mainland China 36.1 104. 0 0 0
## 6 Guangdong Mainland China 23.3 113. 0 0 0
## 7 Guangxi Mainland China 23.8 109. 0 0 0
## 8 Guizhou Mainland China 26.8 107. 0 0 0
## 9 Hainan Mainland China 19.2 110. 0 0 0
## 10 Hebei Mainland China 38.0 115. 0 1 1
## # … with 84 more rows, and 32 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>,
## # `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
## # `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
## # `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
## # `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
## # `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
## # `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
## # `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
## # `2/24/20` <dbl>, `2/25/20` <dbl>
I want to tidy data into the long format. pivot_longer is the modern version of gather function in dplyr.
confirmed_long <- confirmed %>%
pivot_longer(-(`Province/State`:Long),
names_to = "Date",
values_to = "confirmed") %>%
mutate(Date = (mdy(Date))) # convert string to date-time
confirmed_long
## # A tibble: 3,290 x 6
## `Province/State` `Country/Region` Lat Long Date confirmed
## <chr> <chr> <dbl> <dbl> <date> <dbl>
## 1 Anhui Mainland China 31.8 117. 2020-01-22 1
## 2 Anhui Mainland China 31.8 117. 2020-01-23 9
## 3 Anhui Mainland China 31.8 117. 2020-01-24 15
## 4 Anhui Mainland China 31.8 117. 2020-01-25 39
## 5 Anhui Mainland China 31.8 117. 2020-01-26 60
## 6 Anhui Mainland China 31.8 117. 2020-01-27 70
## 7 Anhui Mainland China 31.8 117. 2020-01-28 106
## 8 Anhui Mainland China 31.8 117. 2020-01-29 152
## 9 Anhui Mainland China 31.8 117. 2020-01-30 200
## 10 Anhui Mainland China 31.8 117. 2020-01-31 237
## # … with 3,280 more rows
recovered_long <- recovered %>%
pivot_longer(-(`Province/State`:Long),
names_to = "Date",
values_to = "recovered") %>%
mutate(Date = mdy(Date))
recovered_long
## # A tibble: 3,290 x 6
## `Province/State` `Country/Region` Lat Long Date recovered
## <chr> <chr> <dbl> <dbl> <date> <dbl>
## 1 Anhui Mainland China 31.8 117. 2020-01-22 0
## 2 Anhui Mainland China 31.8 117. 2020-01-23 0
## 3 Anhui Mainland China 31.8 117. 2020-01-24 0
## 4 Anhui Mainland China 31.8 117. 2020-01-25 0
## 5 Anhui Mainland China 31.8 117. 2020-01-26 0
## 6 Anhui Mainland China 31.8 117. 2020-01-27 0
## 7 Anhui Mainland China 31.8 117. 2020-01-28 0
## 8 Anhui Mainland China 31.8 117. 2020-01-29 2
## 9 Anhui Mainland China 31.8 117. 2020-01-30 2
## 10 Anhui Mainland China 31.8 117. 2020-01-31 3
## # … with 3,280 more rows
death_long <- death %>%
pivot_longer(-(`Province/State`:Long),
names_to = "Date",
values_to = "death") %>%
mutate(Date = mdy(Date))
death_long
## # A tibble: 3,290 x 6
## `Province/State` `Country/Region` Lat Long Date death
## <chr> <chr> <dbl> <dbl> <date> <dbl>
## 1 Anhui Mainland China 31.8 117. 2020-01-22 0
## 2 Anhui Mainland China 31.8 117. 2020-01-23 0
## 3 Anhui Mainland China 31.8 117. 2020-01-24 0
## 4 Anhui Mainland China 31.8 117. 2020-01-25 0
## 5 Anhui Mainland China 31.8 117. 2020-01-26 0
## 6 Anhui Mainland China 31.8 117. 2020-01-27 0
## 7 Anhui Mainland China 31.8 117. 2020-01-28 0
## 8 Anhui Mainland China 31.8 117. 2020-01-29 0
## 9 Anhui Mainland China 31.8 117. 2020-01-30 0
## 10 Anhui Mainland China 31.8 117. 2020-01-31 0
## # … with 3,280 more rows
ncov_tbl <- confirmed_long %>%
left_join(recovered_long) %>%
left_join(death_long) %>%
pivot_longer(confirmed:death,
names_to = "Case",
values_to = "Count")
## Joining, by = c("Province/State", "Country/Region", "Lat", "Long", "Date")
## Joining, by = c("Province/State", "Country/Region", "Lat", "Long", "Date")
ncov_tbl %>% print(width = Inf)
## # A tibble: 9,870 x 7
## `Province/State` `Country/Region` Lat Long Date Case Count
## <chr> <chr> <dbl> <dbl> <date> <chr> <dbl>
## 1 Anhui Mainland China 31.8 117. 2020-01-22 confirmed 1
## 2 Anhui Mainland China 31.8 117. 2020-01-22 recovered 0
## 3 Anhui Mainland China 31.8 117. 2020-01-22 death 0
## 4 Anhui Mainland China 31.8 117. 2020-01-23 confirmed 9
## 5 Anhui Mainland China 31.8 117. 2020-01-23 recovered 0
## 6 Anhui Mainland China 31.8 117. 2020-01-23 death 0
## 7 Anhui Mainland China 31.8 117. 2020-01-24 confirmed 15
## 8 Anhui Mainland China 31.8 117. 2020-01-24 recovered 0
## 9 Anhui Mainland China 31.8 117. 2020-01-24 death 0
## 10 Anhui Mainland China 31.8 117. 2020-01-25 confirmed 39
## # … with 9,860 more rows
# prepare coronavirus data for shiny app
saveRDS(ncov_tbl, file = "../2019-20Coronavirus/ncov_tbl.rds")
A self note about installing sf package on CentOS 7: https://gist.github.com/Hua-Zhou/6c11babe35437ce1ea8e4893a14d07c8.
Donwload China GIS data from here, unzip, and put in the working directory.
ls -l bou2_4p.*
## -rw-r--r--. 1 xurui1996 xurui1996 86283 Feb 26 09:24 bou2_4p.dbf
## -rw-r--r--. 1 xurui1996 xurui1996 1508752 Feb 26 09:24 bou2_4p.shp
## -rw-r--r--. 1 xurui1996 xurui1996 7500 Feb 26 09:24 bou2_4p.shx
Read in the shape file into simple feature (SF) format. Replace the NA in NAME by Macau.
library(sf)
chn_map <- st_read("./bou2_4p.shp", as_tibble = TRUE) %>%
mutate(NAME = iconv(NAME, from = "GBK"),
BOU2_4M_ = as.integer(BOU2_4M_),
BOU2_4M_ID = as.integer(BOU2_4M_ID)) %>%
mutate(NAME = str_replace_na(NAME, replacement = "澳门特别行政区")) %>%
print()
## Reading layer `bou2_4p' from data source `/home/xurui1996/biostat-203b-2020-winter/hw3/hw3sol/bou2_4p.shp' using driver `ESRI Shapefile'
## Simple feature collection with 925 features and 7 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 73.44696 ymin: 6.318641 xmax: 135.0858 ymax: 53.55793
## epsg (SRID): NA
## proj4string: NA
## Simple feature collection with 925 features and 7 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 73.44696 ymin: 6.318641 xmax: 135.0858 ymax: 53.55793
## epsg (SRID): NA
## proj4string: NA
## # A tibble: 925 x 8
## AREA PERIMETER BOU2_4M_ BOU2_4M_ID ADCODE93 ADCODE99 NAME
## * <dbl> <dbl> <int> <int> <int> <int> <chr>
## 1 54.4 68.5 2 23 230000 230000 黑龙江省…
## 2 129. 130. 3 15 150000 150000 内蒙古自…
## 3 176. 84.9 4 65 650000 650000 新疆维吾…
## 4 21.3 41.2 5 22 220000 220000 吉林省…
## 5 15.6 38.4 6 21 210000 210000 辽宁省…
## 6 41.5 76.8 7 62 620000 620000 甘肃省…
## 7 19.5 44.9 8 13 130000 130000 河北省…
## 8 1.73 8.50 9 11 110000 110000 北京市…
## 9 0 0.088 10 292 210000 210000 辽宁省…
## 10 0 0.047 11 292 210000 210000 辽宁省…
## # … with 915 more rows, and 1 more variable: geometry <POLYGON>
There are about 34 provinces in China, why there are 925 areas? Because one province is composed of n (n >= 1) polygons.
chn_map %>%
count(NAME) %>%
print(n = Inf)
## Simple feature collection with 34 features and 2 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: 73.44696 ymin: 6.318641 xmax: 135.0858 ymax: 53.55793
## epsg (SRID): NA
## proj4string: NA
## # A tibble: 34 x 3
## NAME n geometry
## * <chr> <int> <GEOMETRY>
## 1 上海市 12 MULTIPOLYGON (((121.414 30.6952, 121.415 30.69756, 121.41…
## 2 云南省 1 POLYGON ((98.65588 28.83075, 98.65788 28.8333, 98.66165 2…
## 3 内蒙古自治区 1 POLYGON ((121.4884 53.33265, 121.4974 53.32104, 121.5004 …
## 4 北京市 1 POLYGON ((117.496 40.66534, 117.4992 40.64039, 117.4938 4…
## 5 台湾省 57 MULTIPOLYGON (((120.8155 21.75681, 120.8174 21.75765, 120…
## 6 吉林省 1 POLYGON ((123.171 46.24668, 123.2186 46.26945, 123.2394 4…
## 7 四川省 1 POLYGON ((102.447 33.56613, 102.4386 33.57738, 102.4011 3…
## 8 天津市 1 POLYGON ((117.4638 40.24003, 117.4962 40.22717, 117.5416 …
## 9 宁夏回族自治区… 1 POLYGON ((106.8099 39.30777, 106.7962 39.30038, 106.8025 …
## 10 安徽省 1 POLYGON ((116.3686 34.64087, 116.4237 34.65374, 116.432 3…
## 11 山东省 86 MULTIPOLYGON (((120.7183 36.28086, 120.7147 36.28513, 120…
## 12 山西省 1 POLYGON ((114.1232 40.74159, 114.2001 40.65104, 114.1953 …
## 13 广东省 154 MULTIPOLYGON (((109.9309 20.22808, 109.9309 20.22664, 109…
## 14 广西壮族自治区… 6 MULTIPOLYGON (((109.2133 20.89708, 109.2109 20.89759, 109…
## 15 新疆维吾尔自治区… 1 POLYGON ((96.38329 42.72696, 96.35991 42.70969, 96.09664 …
## 16 江苏省 5 MULTIPOLYGON (((119.4393 34.77124, 119.4419 34.77665, 119…
## 17 江西省 1 POLYGON ((116.1283 29.82727, 116.1726 29.83686, 116.2121 …
## 18 河北省 9 MULTIPOLYGON (((117.194 40.08278, 117.1983 40.0796, 117.2…
## 19 河南省 1 POLYGON ((113.7235 36.35595, 113.7516 36.37056, 113.8081 …
## 20 浙江省 179 MULTIPOLYGON (((120.4666 27.16607, 120.468 27.16719, 120.…
## 21 海南省 79 MULTIPOLYGON (((113.2364 6.323439, 113.2353 6.32, 113.233…
## 22 湖北省 1 POLYGON ((109.757 32.91204, 109.7823 33.00224, 109.7863 3…
## 23 湖南省 1 POLYGON ((111.2695 30.01506, 111.2882 30.01272, 111.2936 …
## 24 澳门特别行政区… 1 POLYGON ((114.2259 22.54496, 114.226 22.54444, 114.2263 2…
## 25 甘肃省 1 POLYGON ((96.38329 42.72696, 96.38308 42.72533, 96.39072 …
## 26 福建省 168 MULTIPOLYGON (((119.9835 25.96752, 119.986 25.96805, 119.…
## 27 西藏自治区 1 POLYGON ((79.03262 34.33298, 79.07736 34.41547, 79.15108 …
## 28 贵州省 2 MULTIPOLYGON (((109.5375 26.75003, 109.571 26.76731, 109.…
## 29 辽宁省 94 MULTIPOLYGON (((121.7979 40.99618, 121.7931 40.98311, 121…
## 30 重庆市 1 POLYGON ((108.5145 32.21204, 108.5308 32.195, 108.5808 32…
## 31 陕西省 1 POLYGON ((111.1508 39.54331, 111.1104 39.50884, 111.1047 …
## 32 青海省 1 POLYGON ((96.98931 38.82063, 96.98743 38.8617, 96.95775 3…
## 33 香港特别行政区… 53 MULTIPOLYGON (((113.9175 22.15853, 113.9029 22.15769, 113…
## 34 黑龙江省 1 POLYGON ((121.4884 53.33265, 121.4995 53.33601, 121.5184 …
ggplot2 can plot SF data using geom_sf function:
chn_map %>%
ggplot() +
geom_sf(mapping = aes(geometry = geometry), color = "black", fill = "white") +
#geom_sf_label(mapping = aes(label = NAME)) +
theme_bw() # better for maps
These are the provinces available from the JHU sheets.
ncov_tbl %>%
filter(`Country/Region` %in% c("Mainland China", "Macau",
"Hong Kong", "Taiwan")) %>%
distinct(`Province/State`, `Country/Region`) %>%
print(n = Inf)
## # A tibble: 34 x 2
## `Province/State` `Country/Region`
## <chr> <chr>
## 1 Anhui Mainland China
## 2 Beijing Mainland China
## 3 Chongqing Mainland China
## 4 Fujian Mainland China
## 5 Gansu Mainland China
## 6 Guangdong Mainland China
## 7 Guangxi Mainland China
## 8 Guizhou Mainland China
## 9 Hainan Mainland China
## 10 Hebei Mainland China
## 11 Heilongjiang Mainland China
## 12 Henan Mainland China
## 13 Hubei Mainland China
## 14 Hunan Mainland China
## 15 Inner Mongolia Mainland China
## 16 Jiangsu Mainland China
## 17 Jiangxi Mainland China
## 18 Jilin Mainland China
## 19 Liaoning Mainland China
## 20 Ningxia Mainland China
## 21 Qinghai Mainland China
## 22 Shaanxi Mainland China
## 23 Shandong Mainland China
## 24 Shanghai Mainland China
## 25 Shanxi Mainland China
## 26 Sichuan Mainland China
## 27 Tianjin Mainland China
## 28 Tibet Mainland China
## 29 Xinjiang Mainland China
## 30 Yunnan Mainland China
## 31 Zhejiang Mainland China
## 32 Taiwan Taiwan
## 33 Macau Macau
## 34 Hong Kong Hong Kong
In order to join the tibbles ncov_tbl and chn_map, we need to use the province name as key. Let’s create a function to translate Chinese province name to English.
translate <- function(x) {
sapply(x, function(chn_name) {
if (str_detect(chn_name, "澳门")) {
eng_name <- "Macau"
} else if (str_detect(chn_name, "台湾")) {
eng_name <- "Taiwan"
} else if (str_detect(chn_name, "上海")) {
eng_name <- "Shanghai"
} else if (str_detect(chn_name, "云南")) {
eng_name <- "Yunnan"
} else if (str_detect(chn_name, "内蒙古")) {
eng_name <- "Inner Mongolia"
} else if (str_detect(chn_name, "北京")) {
eng_name <- "Beijing"
} else if (str_detect(chn_name, "台湾")) {
eng_name <- "Taiwan"
} else if (str_detect(chn_name, "吉林")) {
eng_name <- "Jilin"
} else if (str_detect(chn_name, "四川")) {
eng_name <- "Sichuan"
} else if (str_detect(chn_name, "天津")) {
eng_name <- "Tianjin"
} else if (str_detect(chn_name, "宁夏")) {
eng_name <- "Ningxia"
} else if (str_detect(chn_name, "安徽")) {
eng_name <- "Anhui"
} else if (str_detect(chn_name, "山东")) {
eng_name <- "Shandong"
} else if (str_detect(chn_name, "山西")) {
eng_name <- "Shanxi"
} else if (str_detect(chn_name, "广东")) {
eng_name <- "Guangdong"
} else if (str_detect(chn_name, "广西")) {
eng_name <- "Guangxi"
} else if (str_detect(chn_name, "新疆")) {
eng_name <- "Xinjiang"
} else if (str_detect(chn_name, "江苏")) {
eng_name <- "Jiangsu"
} else if (str_detect(chn_name, "江西")) {
eng_name <- "Jiangxi"
} else if (str_detect(chn_name, "河北")) {
eng_name <- "Hebei"
} else if (str_detect(chn_name, "河南")) {
eng_name <- "Henan"
} else if (str_detect(chn_name, "浙江")) {
eng_name <- "Zhejiang"
} else if (str_detect(chn_name, "海南")) {
eng_name <- "Hainan"
} else if (str_detect(chn_name, "湖北")) {
eng_name <- "Hubei"
} else if (str_detect(chn_name, "湖南")) {
eng_name <- "Hunan"
} else if (str_detect(chn_name, "甘肃")) {
eng_name <- "Gansu"
} else if (str_detect(chn_name, "福建")) {
eng_name <- "Fujian"
} else if (str_detect(chn_name, "西藏")) {
eng_name <- "Tibet"
} else if (str_detect(chn_name, "贵州")) {
eng_name <- "Guizhou"
} else if (str_detect(chn_name, "辽宁")) {
eng_name <- "Liaoning"
} else if (str_detect(chn_name, "重庆")) {
eng_name <- "Chongqing"
} else if (str_detect(chn_name, "陕西")) {
eng_name <- "Shanxi"
} else if (str_detect(chn_name, "青海")) {
eng_name <- "Qinghai"
} else if (str_detect(chn_name, "香港")) {
eng_name <- "Hong Kong"
} else if (str_detect(chn_name, "黑龙江")) {
eng_name <- "Heilongjiang"
} else {
eng_name <- chn_name # don't translate if no correspondence
}
return(eng_name)
})
}
Create a new variable NAME_ENG:
chn_prov <- chn_map %>%
count(NAME) %>%
mutate(NAME_ENG = translate(NAME)) # translate function is vectorized
chn_prov %>% print(n = Inf)
## Simple feature collection with 34 features and 3 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: 73.44696 ymin: 6.318641 xmax: 135.0858 ymax: 53.55793
## epsg (SRID): NA
## proj4string: NA
## # A tibble: 34 x 4
## NAME n geometry NAME_ENG
## * <chr> <int> <GEOMETRY> <chr>
## 1 上海市 12 MULTIPOLYGON (((121.414 30.6952, 121.415 30.6975… Shanghai
## 2 云南省 1 POLYGON ((98.65588 28.83075, 98.65788 28.8333, 9… Yunnan
## 3 内蒙古自治区… 1 POLYGON ((121.4884 53.33265, 121.4974 53.32104, … Inner Mon…
## 4 北京市 1 POLYGON ((117.496 40.66534, 117.4992 40.64039, 1… Beijing
## 5 台湾省 57 MULTIPOLYGON (((120.8155 21.75681, 120.8174 21.7… Taiwan
## 6 吉林省 1 POLYGON ((123.171 46.24668, 123.2186 46.26945, 1… Jilin
## 7 四川省 1 POLYGON ((102.447 33.56613, 102.4386 33.57738, 1… Sichuan
## 8 天津市 1 POLYGON ((117.4638 40.24003, 117.4962 40.22717, … Tianjin
## 9 宁夏回族自治区… 1 POLYGON ((106.8099 39.30777, 106.7962 39.30038, … Ningxia
## 10 安徽省 1 POLYGON ((116.3686 34.64087, 116.4237 34.65374, … Anhui
## 11 山东省 86 MULTIPOLYGON (((120.7183 36.28086, 120.7147 36.2… Shandong
## 12 山西省 1 POLYGON ((114.1232 40.74159, 114.2001 40.65104, … Shanxi
## 13 广东省 154 MULTIPOLYGON (((109.9309 20.22808, 109.9309 20.2… Guangdong
## 14 广西壮族自治区… 6 MULTIPOLYGON (((109.2133 20.89708, 109.2109 20.8… Guangxi
## 15 新疆维吾尔自治区… 1 POLYGON ((96.38329 42.72696, 96.35991 42.70969, … Xinjiang
## 16 江苏省 5 MULTIPOLYGON (((119.4393 34.77124, 119.4419 34.7… Jiangsu
## 17 江西省 1 POLYGON ((116.1283 29.82727, 116.1726 29.83686, … Jiangxi
## 18 河北省 9 MULTIPOLYGON (((117.194 40.08278, 117.1983 40.07… Hebei
## 19 河南省 1 POLYGON ((113.7235 36.35595, 113.7516 36.37056, … Henan
## 20 浙江省 179 MULTIPOLYGON (((120.4666 27.16607, 120.468 27.16… Zhejiang
## 21 海南省 79 MULTIPOLYGON (((113.2364 6.323439, 113.2353 6.32… Hainan
## 22 湖北省 1 POLYGON ((109.757 32.91204, 109.7823 33.00224, 1… Hubei
## 23 湖南省 1 POLYGON ((111.2695 30.01506, 111.2882 30.01272, … Hunan
## 24 澳门特别行政区… 1 POLYGON ((114.2259 22.54496, 114.226 22.54444, 1… Macau
## 25 甘肃省 1 POLYGON ((96.38329 42.72696, 96.38308 42.72533, … Gansu
## 26 福建省 168 MULTIPOLYGON (((119.9835 25.96752, 119.986 25.96… Fujian
## 27 西藏自治区 1 POLYGON ((79.03262 34.33298, 79.07736 34.41547, … Tibet
## 28 贵州省 2 MULTIPOLYGON (((109.5375 26.75003, 109.571 26.76… Guizhou
## 29 辽宁省 94 MULTIPOLYGON (((121.7979 40.99618, 121.7931 40.9… Liaoning
## 30 重庆市 1 POLYGON ((108.5145 32.21204, 108.5308 32.195, 10… Chongqing
## 31 陕西省 1 POLYGON ((111.1508 39.54331, 111.1104 39.50884, … Shanxi
## 32 青海省 1 POLYGON ((96.98931 38.82063, 96.98743 38.8617, 9… Qinghai
## 33 香港特别行政区… 53 MULTIPOLYGON (((113.9175 22.15853, 113.9029 22.1… Hong Kong
## 34 黑龙江省 1 POLYGON ((121.4884 53.33265, 121.4995 53.33601, … Heilongji…
# prepare China map data for shiny app
saveRDS(chn_prov, file = "../2019-20Coronavirus/chn_prov.rds")
The package maps contains a data world for the world map. ggplot2 can plot the world map data using geom_polygon function.
countries <- map_data("world") %>% as_tibble()
ggplot(countries, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
coord_quickmap()
In order to join the tibbles ncov_tbl and countries, we need to make the names of countries in these two tibbles consistent. Let’s create a function to realize this requirement.
translate2 <- function(x) {
sapply(x, function(name) {
if (str_detect(name, "China")) {
# note here the Mainland China represents the whole China
new_name <- "Mainland China"
} else if (str_detect(name, "Taiwan")) {
new_name <- "Mainland China"
} else if (str_detect(name, "USA")) {
new_name <- "US"
} else {
new_name <- name
}
})
}
countries <- countries %>%
mutate(new_name = translate2(`region`))
countries
## # A tibble: 99,338 x 7
## long lat group order region subregion new_name
## <dbl> <dbl> <dbl> <int> <chr> <chr> <chr>
## 1 -69.9 12.5 1 1 Aruba <NA> Aruba
## 2 -69.9 12.4 1 2 Aruba <NA> Aruba
## 3 -69.9 12.4 1 3 Aruba <NA> Aruba
## 4 -70.0 12.5 1 4 Aruba <NA> Aruba
## 5 -70.1 12.5 1 5 Aruba <NA> Aruba
## 6 -70.1 12.6 1 6 Aruba <NA> Aruba
## 7 -70.0 12.6 1 7 Aruba <NA> Aruba
## 8 -70.0 12.6 1 8 Aruba <NA> Aruba
## 9 -69.9 12.5 1 9 Aruba <NA> Aruba
## 10 -69.9 12.5 1 10 Aruba <NA> Aruba
## # … with 99,328 more rows
# prepare the world map data for shiny app
saveRDS(countries, file = "../2019-20Coronavirus/countries.rds")
Try to join the virus data ncov_tbl and map data chn_prov.
# for exploration
plotdate <- "2020-02-14"
case <- "confirmed"
plotaday <- ncov_tbl %>%
filter(`Country/Region` %in% c("Mainland China", "Macau",
"Hong Kong", "Taiwan")) %>%
filter(Date == plotdate, Case == case) %>%
group_by(`Province/State`) %>%
top_n(1, Date) %>%
right_join(chn_prov, by = c("Province/State" = "NAME_ENG"))
## Warning: Column `Province/State`/`NAME_ENG` has different attributes on LHS and
## RHS of join
Plot confirmed cases on a specific date:
plotaday %>%
ggplot() +
geom_sf(mapping = aes(fill = Count, geometry = geometry)) +
# scale_fill_gradient(low = "white",
# high = "red",
# trans = "log10",
# limits = c(1, 50000),
# breaks = c(1, 10, 100, 1000, 10000),
# name = "") +
scale_fill_gradientn(colors = wes_palette("Zissou1", 100,
type = "continuous"), trans = "log10") +
# #scale_fill_brewer(palette = "Dark2") +
theme_bw() +
labs(title = str_c(case, " cases"), subtitle = plotdate)
Counts by province on a specific date.
date <- "2020-02-18"
ncov_tbl %>%
filter(`Country/Region` %in% c("Mainland China", "Macau",
"Hong Kong", "Taiwan"),
`Date` == date) %>%
group_by(`Province/State`) %>%
ggplot() +
geom_col(mapping = aes(x = `Province/State`, y = `Count`, fill = `Case`)) +
scale_y_log10() +
labs(title = date) +
theme(axis.text.x = element_text(angle = 90))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 11 rows containing missing values (geom_col).
To plot the line graph of confirmed cases over time:
ncov_tbl %>%
filter(`Country/Region` %in% c("Mainland China", "Macau",
"Hong Kong", "Taiwan")) %>%
group_by(Date, Case) %>%
summarise(total_count = sum(Count)) %>%
# print()
ggplot() +
geom_line(mapping = aes(x = Date, y = total_count, color = Case), size = 2) +
scale_color_manual(values = c("blue", "black", "green")) +
scale_y_log10() +
labs(y = "Count") +
theme_bw()
Resources about making animations in R:
- gganimate package.
- Section 8.3 of Geomcomputation with R.
Plot the date at all time points (this takes long, a couple minutes), make animation and save as gif (this takes long, a couple minutes):
for (case in c("confirmed", "recovered", "death")) {
(p <- ncov_tbl %>%
filter(`Country/Region` %in% c("Mainland China", "Macau",
"Hong Kong", "Taiwan")) %>%
filter(Case == case) %>%
right_join(chn_prov, by = c("Province/State" = "NAME_ENG")) %>%
ggplot() +
geom_sf(mapping = aes(fill = Count, geometry = geometry)) +
# scale_fill_gradient(low = "white",
# high = "red",
# trans = "log10",
# limits = c(1, 100000),
# breaks = c(1, 10, 100, 1000, 10000),
# name = "") +
scale_fill_gradientn(colours = wes_palette("Zissou1", 100,
type = "continuous"), trans = "log10") +
theme_bw() +
labs(title = str_c(case, " cases")))
# make animation and save as gif
(anim <- p +
transition_time(Date) +
labs(title = str_c(case, " cases"), subtitle = "Date: {frame_time}"))
animate(anim, renderer = gifski_renderer())
anim_save(str_c(case, "_anim.gif"))
}
## Warning: Column `Province/State`/`NAME_ENG` has different attributes on LHS and
## RHS of join
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Column `Province/State`/`NAME_ENG` has different attributes on LHS and
## RHS of join
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Column `Province/State`/`NAME_ENG` has different attributes on LHS and
## RHS of join
## Warning: Transformation introduced infinite values in discrete y-axis
p <- ncov_tbl %>%
filter(`Country/Region` %in% c("Mainland China", "Macau",
"Hong Kong", "Taiwan")) %>%
group_by(`Province/State`) %>%
ggplot() +
geom_col(mapping = aes(x = `Province/State`, y = `Count`, fill = `Case`)) +
scale_y_log10() +
labs(title = date) +
theme(axis.text.x = element_text(angle = 90))
# make animation and save as gif
(anim <- p +
transition_time(Date) +
labs(title = str_c("data animation"), subtitle = "Date: {frame_time}"))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 75 rows containing missing values (geom_col).
## Warning: Removed 75 rows containing missing values (geom_col).
## Warning: Removed 75 rows containing missing values (geom_col).
## Warning: Removed 75 rows containing missing values (geom_col).
## Warning: Removed 67 rows containing missing values (geom_col).
## Warning: Removed 67 rows containing missing values (geom_col).
## Warning: Removed 67 rows containing missing values (geom_col).
## Warning: Removed 62 rows containing missing values (geom_col).
## Warning: Removed 62 rows containing missing values (geom_col).
## Warning: Removed 62 rows containing missing values (geom_col).
## Warning: Removed 60 rows containing missing values (geom_col).
## Warning: Removed 60 rows containing missing values (geom_col).
## Warning: Removed 60 rows containing missing values (geom_col).
## Warning: Removed 58 rows containing missing values (geom_col).
## Warning: Removed 58 rows containing missing values (geom_col).
## Warning: Removed 58 rows containing missing values (geom_col).
## Warning: Removed 55 rows containing missing values (geom_col).
## Warning: Removed 55 rows containing missing values (geom_col).
## Warning: Removed 54 rows containing missing values (geom_col).
## Warning: Removed 54 rows containing missing values (geom_col).
## Warning: Removed 54 rows containing missing values (geom_col).
## Warning: Removed 45 rows containing missing values (geom_col).
## Warning: Removed 45 rows containing missing values (geom_col).
## Warning: Removed 45 rows containing missing values (geom_col).
## Warning: Removed 41 rows containing missing values (geom_col).
## Warning: Removed 41 rows containing missing values (geom_col).
## Warning: Removed 41 rows containing missing values (geom_col).
## Warning: Removed 39 rows containing missing values (geom_col).
## Warning: Removed 39 rows containing missing values (geom_col).
## Warning: Removed 39 rows containing missing values (geom_col).
## Warning: Removed 37 rows containing missing values (geom_col).
## Warning: Removed 37 rows containing missing values (geom_col).
## Warning: Removed 37 rows containing missing values (geom_col).
## Warning: Removed 34 rows containing missing values (geom_col).
## Warning: Removed 34 rows containing missing values (geom_col).
## Warning: Removed 34 rows containing missing values (geom_col).
## Warning: Removed 32 rows containing missing values (geom_col).
## Warning: Removed 32 rows containing missing values (geom_col).
## Warning: Removed 32 rows containing missing values (geom_col).
## Warning: Removed 30 rows containing missing values (geom_col).
## Warning: Removed 30 rows containing missing values (geom_col).
## Warning: Removed 30 rows containing missing values (geom_col).
## Warning: Removed 27 rows containing missing values (geom_col).
## Warning: Removed 27 rows containing missing values (geom_col).
## Warning: Removed 27 rows containing missing values (geom_col).
## Warning: Removed 25 rows containing missing values (geom_col).
## Warning: Removed 25 rows containing missing values (geom_col).
## Warning: Removed 25 rows containing missing values (geom_col).
## Warning: Removed 23 rows containing missing values (geom_col).
## Warning: Removed 23 rows containing missing values (geom_col).
## Warning: Removed 21 rows containing missing values (geom_col).
## Warning: Removed 21 rows containing missing values (geom_col).
## Warning: Removed 21 rows containing missing values (geom_col).
## Warning: Removed 18 rows containing missing values (geom_col).
## Warning: Removed 18 rows containing missing values (geom_col).
## Warning: Removed 18 rows containing missing values (geom_col).
## Warning: Removed 17 rows containing missing values (geom_col).
## Warning: Removed 17 rows containing missing values (geom_col).
## Warning: Removed 17 rows containing missing values (geom_col).
## Warning: Removed 16 rows containing missing values (geom_col).
## Warning: Removed 16 rows containing missing values (geom_col).
## Warning: Removed 16 rows containing missing values (geom_col).
## Warning: Removed 13 rows containing missing values (geom_col).
## Warning: Removed 13 rows containing missing values (geom_col).
## Warning: Removed 13 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 10 rows containing missing values (geom_col).
## Warning: Removed 10 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
animate(anim, renderer = gifski_renderer())
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 75 rows containing missing values (geom_col).
## Warning: Removed 75 rows containing missing values (geom_col).
## Warning: Removed 75 rows containing missing values (geom_col).
## Warning: Removed 75 rows containing missing values (geom_col).
## Warning: Removed 67 rows containing missing values (geom_col).
## Warning: Removed 67 rows containing missing values (geom_col).
## Warning: Removed 67 rows containing missing values (geom_col).
## Warning: Removed 62 rows containing missing values (geom_col).
## Warning: Removed 62 rows containing missing values (geom_col).
## Warning: Removed 62 rows containing missing values (geom_col).
## Warning: Removed 60 rows containing missing values (geom_col).
## Warning: Removed 60 rows containing missing values (geom_col).
## Warning: Removed 60 rows containing missing values (geom_col).
## Warning: Removed 58 rows containing missing values (geom_col).
## Warning: Removed 58 rows containing missing values (geom_col).
## Warning: Removed 58 rows containing missing values (geom_col).
## Warning: Removed 55 rows containing missing values (geom_col).
## Warning: Removed 55 rows containing missing values (geom_col).
## Warning: Removed 54 rows containing missing values (geom_col).
## Warning: Removed 54 rows containing missing values (geom_col).
## Warning: Removed 54 rows containing missing values (geom_col).
## Warning: Removed 45 rows containing missing values (geom_col).
## Warning: Removed 45 rows containing missing values (geom_col).
## Warning: Removed 45 rows containing missing values (geom_col).
## Warning: Removed 41 rows containing missing values (geom_col).
## Warning: Removed 41 rows containing missing values (geom_col).
## Warning: Removed 41 rows containing missing values (geom_col).
## Warning: Removed 39 rows containing missing values (geom_col).
## Warning: Removed 39 rows containing missing values (geom_col).
## Warning: Removed 39 rows containing missing values (geom_col).
## Warning: Removed 37 rows containing missing values (geom_col).
## Warning: Removed 37 rows containing missing values (geom_col).
## Warning: Removed 37 rows containing missing values (geom_col).
## Warning: Removed 34 rows containing missing values (geom_col).
## Warning: Removed 34 rows containing missing values (geom_col).
## Warning: Removed 34 rows containing missing values (geom_col).
## Warning: Removed 32 rows containing missing values (geom_col).
## Warning: Removed 32 rows containing missing values (geom_col).
## Warning: Removed 32 rows containing missing values (geom_col).
## Warning: Removed 30 rows containing missing values (geom_col).
## Warning: Removed 30 rows containing missing values (geom_col).
## Warning: Removed 30 rows containing missing values (geom_col).
## Warning: Removed 27 rows containing missing values (geom_col).
## Warning: Removed 27 rows containing missing values (geom_col).
## Warning: Removed 27 rows containing missing values (geom_col).
## Warning: Removed 25 rows containing missing values (geom_col).
## Warning: Removed 25 rows containing missing values (geom_col).
## Warning: Removed 25 rows containing missing values (geom_col).
## Warning: Removed 23 rows containing missing values (geom_col).
## Warning: Removed 23 rows containing missing values (geom_col).
## Warning: Removed 21 rows containing missing values (geom_col).
## Warning: Removed 21 rows containing missing values (geom_col).
## Warning: Removed 21 rows containing missing values (geom_col).
## Warning: Removed 18 rows containing missing values (geom_col).
## Warning: Removed 18 rows containing missing values (geom_col).
## Warning: Removed 18 rows containing missing values (geom_col).
## Warning: Removed 17 rows containing missing values (geom_col).
## Warning: Removed 17 rows containing missing values (geom_col).
## Warning: Removed 17 rows containing missing values (geom_col).
## Warning: Removed 16 rows containing missing values (geom_col).
## Warning: Removed 16 rows containing missing values (geom_col).
## Warning: Removed 16 rows containing missing values (geom_col).
## Warning: Removed 13 rows containing missing values (geom_col).
## Warning: Removed 13 rows containing missing values (geom_col).
## Warning: Removed 13 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 12 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 11 rows containing missing values (geom_col).
## Warning: Removed 10 rows containing missing values (geom_col).
## Warning: Removed 10 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
## Warning: Removed 7 rows containing missing values (geom_col).
anim_save(str_c("data_anim.gif"))
plotdate <- "2020-02-14"
case <- "confirmed"
(info <- ncov_tbl %>%
filter(Date == plotdate, Case == case) %>%
group_by(`Country/Region`) %>%
top_n(1, Date)) # take the latest count on that date
## # A tibble: 94 x 7
## # Groups: Country/Region [42]
## `Province/State` `Country/Region` Lat Long Date Case Count
## <chr> <chr> <dbl> <dbl> <date> <chr> <dbl>
## 1 Anhui Mainland China 31.8 117. 2020-02-14 confirmed 934
## 2 Beijing Mainland China 40.2 116. 2020-02-14 confirmed 372
## 3 Chongqing Mainland China 30.1 108. 2020-02-14 confirmed 537
## 4 Fujian Mainland China 26.1 118. 2020-02-14 confirmed 281
## 5 Gansu Mainland China 36.1 104. 2020-02-14 confirmed 90
## 6 Guangdong Mainland China 23.3 113. 2020-02-14 confirmed 1261
## 7 Guangxi Mainland China 23.8 109. 2020-02-14 confirmed 226
## 8 Guizhou Mainland China 26.8 107. 2020-02-14 confirmed 140
## 9 Hainan Mainland China 19.2 110. 2020-02-14 confirmed 159
## 10 Hebei Mainland China 38.0 115. 2020-02-14 confirmed 283
## # … with 84 more rows
China <- info %>%
filter(`Country/Region` %in% c("Mainland China", "Macau",
"Hong Kong", "Taiwan")) %>%
group_by(Case) %>%
mutate(tmp = sum(Count)) %>%
top_n(1, `Province/State`) %>%
select(-`Count`)
colnames(China)[7] <- "Count"
China
## # A tibble: 1 x 7
## # Groups: Case [1]
## `Province/State` `Country/Region` Lat Long Date Case Count
## <chr> <chr> <dbl> <dbl> <date> <chr> <dbl>
## 1 Zhejiang Mainland China 29.2 120. 2020-02-14 confirmed 66376
Other <- info %>%
filter(`Country/Region` != "Mainland China", `Country/Region` != "Macau",
`Country/Region` != "Hong Kong", `Country/Region` != "Taiwan",
`Country/Region` != "Others") %>%
mutate(tmp = sum(Count)) %>%
top_n(1, `Province/State`) %>%
select(- `Count`)
colnames(Other)[7] <- "Count"
Other %>% print(n = Inf)
## # A tibble: 5 x 7
## # Groups: Country/Region [5]
## `Province/State` `Country/Region` Lat Long Date Case Count
## <chr> <chr> <dbl> <dbl> <date> <chr> <dbl>
## 1 Toronto, ON Canada 43.7 -79.4 2020-02-14 confi… 7
## 2 Victoria Australia -37.8 145. 2020-02-14 confi… 15
## 3 From Diamond Princess Israel 31.0 34.9 2020-02-14 confi… 0
## 4 Unassigned Location (Fro… US 37.1 -95.7 2020-02-14 confi… 15
## 5 None Austria 47.5 14.6 2020-02-14 confi… 0
newInfo <- bind_rows(China, Other) %>%
print(n = Inf)
## # A tibble: 6 x 7
## # Groups: Case [1]
## `Province/State` `Country/Region` Lat Long Date Case Count
## <chr> <chr> <dbl> <dbl> <date> <chr> <dbl>
## 1 Zhejiang Mainland China 29.2 120. 2020-02-14 confi… 66376
## 2 Toronto, ON Canada 43.7 -79.4 2020-02-14 confi… 7
## 3 Victoria Australia -37.8 145. 2020-02-14 confi… 15
## 4 From Diamond Princess Israel 31.0 34.9 2020-02-14 confi… 0
## 5 Unassigned Location (Fro… US 37.1 -95.7 2020-02-14 confi… 15
## 6 None Austria 47.5 14.6 2020-02-14 confi… 0
Try to join the virus data newInfo and map data countries.
map <- newInfo %>%
left_join(countries, by = c("Country/Region" = "new_name"))
## Warning: Column `Country/Region`/`new_name` has different attributes on LHS and
## RHS of join
Plot confirmed cases on a specific date:
ggplot(map, aes(long, lat, group = group, color = Count)) +
geom_polygon(aes(fill = Count)) +
# scale_fill_gradientn(colours = wes_palette("Zissou1", 100,
# type = "continuous"), trans = "log10") +
scale_fill_gradientn(
colours = c("#461863", "#404E88", "#2A8A8C", "#7FD157", "#F9E53F"),
values = scales::rescale(c(5, 10, 20, 100, 100000)),
labels = scales::comma,
breaks = c(5, 10, 20, 100, 100000)
) +
guides(fill = guide_legend(reverse = T)) +
theme_bw() +
labs(title = str_c(case, " cases")) +
coord_quickmap()
To plot the line graph of confirmed cases over time.
ncov_tbl %>%
group_by(Date, Case) %>%
summarise(total_count = sum(Count)) %>%
# print()
ggplot() +
geom_line(mapping = aes(x = Date, y = total_count, color = Case), size = 2) +
scale_color_manual(values = c("blue", "black", "green")) +
scale_y_log10() +
labs(y = "Count") +
theme_bw()
library(quantmod)
stock <- getSymbols("^HSI", # S&P 500 (^GSPC), Dow Jones (^DJI), NASDAQ (^IXIC), Russell 2000 (^RUT), FTSE 100 (^FTSE), Nikkei 225 (^N225), HANG SENG INDEX (^HSI)
src = "yahoo",
auto.assign = FALSE,
from = min(ncov_tbl$Date),
to = max(ncov_tbl$Date)) %>%
as_tibble(rownames = "Date") %>%
mutate(Date = date(Date)) %>%
ggplot() +
geom_line(mapping = aes(x = Date, y = HSI.Adjusted)) +
theme_bw()
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
## Warning: 'indexClass<-' is deprecated.
## Use 'tclass<-' instead.
## See help("Deprecated") and help("xts-deprecated").
## Warning: Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
## This warning is displayed once per session.
stock
# chartSeries(stock, theme = chartTheme("white"),
# type = "line", log.scale = FALSE, TA = NULL)